In [1]:
# Packages required (can be installed by pip3 install <package>)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pysal
from pysal.weights.util import get_points_array_from_shapefile
In [3]:
# Build Census Ethnic dataframe reading csv file
datae = "~/Dropbox/Resolution - SP London/Data/Census/Original Data/London/Census data for Segregation" \
"/KS201ew_2011_oa_Ethnic/KS201EWDATA.csv"
dfce = pd.read_csv(datae)
dfce.head()
Out[3]:
In [4]:
# slice ethnic columns and sum each column
ethnic = dfce.loc[:,'W_Irish':'Other_eth']
print("Groups -------- Sum")
ethnic.sum()
Out[4]:
In [5]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(16, 6), dpi=300)
plt.xlabel('Ethnic group')
plt.ylabel('Frequency sum')
plt.title('Ethnic intervals')
plt.ticklabel_format(style='plain', axis='y')
ethnic.sum().plot(kind='bar')
Out[5]:
Ethnic groups (count) in function of frequency sum with white (W_British)
In [6]:
% matplotlib inline
# slice ethnic columns and sum each column
ethnic_full = dfce.loc[:,'W_British':'Other_eth']
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(16, 6), dpi=300)
plt.xlabel('Ethnic group')
plt.ylabel('Frequency sum')
plt.title('Ethnic intervals full')
plt.ticklabel_format(style='plain', axis='y')
ethnic_full.sum().plot(kind='bar')
Out[6]:
This data frame selects data from brute groups counts as a test for comparision with proportion data
In [7]:
# Slice dataframe to get counts columns
cor_mat_count = ethnic_full.corr()
cor_mat_count
Out[7]:
In [8]:
# Plot Heatmap based on cor_mat_count data frame - Counts
% matplotlib inline
sns.set(context="notebook")
f1, ax1 = plt.subplots(figsize=(12, 9))
plt.title('Pearson correlation matrix - Ethnic groups count')
sns.heatmap(cor_mat_count, cmap='RdBu_r', square=True)
Out[8]:
This data frame selects columns data with percentage for ethnic groups
In [9]:
# Slice data frame to get proportion data columns
ethnic_prop = dfce.loc[:,'W.British':'Other.eth']
cor_mat_prop = ethnic_prop.corr()
cor_mat_prop
Out[9]:
In [10]:
# Plot Heatmap based on cor_mat_prop data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f2, ax2 = plt.subplots(figsize=(12, 9))
plt.title('Pearson correlation matrix - Ethnic groups percentage')
sns.heatmap(cor_mat_prop, square=True)
Out[10]:
Heatmap showing a positive correlation for black groups
Black/African/Caribbean/Black British: African
Black/African/Caribbean/Black British: Caribbean
Black/African/Caribbean/Black British: Other Black
In [12]:
shp_path = "/Users/sandrofsousa/Dropbox/Resolution - SP London/Data/Shape files/" \
"London/OA Boundary/resolution_oa_2011_ks201-501ew.shp"
weight_cont = pysal.queen_from_shapefile(shp_path)
# weight_full = weight.full()
Histogram showing the distribuition of neighbors in queen weight
In [26]:
% matplotlib inline
sns.set(context="notebook", font_scale=1.8)
plt.figure(facecolor="white", figsize=(9, 6), dpi=300)
plt.xlabel('Neighbors')
plt.ylabel('Frequency')
plt.title('Queen weights histogram - neighbors relations')
plt.bar(*zip(*weight_cont.histogram))
Out[26]:
Compute Bivariate Moran for all elements of ethnic groups
Variables are compared pairwised resulting on a matrix
999 random permutations are used for calculation of pseudo p_values
In [14]:
# Read dbf associated with the same shape file
# Obs: QGIS field lenght limit(10) changed column names during join
def calc_moran(arrayi, arrayj, weight):
moran = pysal.Moran_BV(arrayi, arrayj, weight, permutations=999)
result = moran.I
pvalue = moran.p_z_sim
return result, pvalue
def block_processing(i, weight, varnames, file):
morans = []
pvalues = []
for item in varnames:
j = np.array(file.by_col[item])
serie = calc_moran(i,j,weight)
moran = serie[0]
pvalue = serie[1]
morans.append(moran)
pvalues.append(pvalue)
return morans, pvalues
def calc_moran_matrix(varnames, weight, file):
moran_matrix = []
pvalue_matrix = []
for item in varnames:
i = np.array(file.by_col[item])
result = block_processing(i,weight, varnames, file)
res_moran = result[0]
res_pvalue = result[1]
moran_matrix.append(res_moran)
pvalue_matrix.append(res_pvalue)
return moran_matrix, pvalue_matrix
In [15]:
# Call main function to compute Moran and P-values ethnic groups
f1 = pysal.open("/Users/sandrofsousa/Dropbox/Resolution - SP London/Data/Shape files/" \
"London/OA Boundary/resolution_oa_2011_ks201-501ew.dbf")
var_ethnics = list(cor_mat_prop)
matrices_ethnic = calc_moran_matrix(var_ethnics, weight_cont, f1)
DataFrame with pairwise results
In [16]:
dfmoran_ethnic = pd.DataFrame(matrices_ethnic[0], columns=var_ethnics, index=var_ethnics)
dfmoran_ethnic
Out[16]:
In [17]:
# Plot Heatmap based on dfmoran data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f3, ax3 = plt.subplots(figsize=(12, 9))
plt.title('Moran I correlation matrix - Ethnic groups Queen weight')
sns.heatmap(dfmoran_ethnic, square=True)
Out[17]:
Heatmap showing a positive correlation for black groups as could be seen in the Pearson correlation
P-values obtained from previous map
In [18]:
dfpvalue_ethnic = pd.DataFrame(matrices_ethnic[1], columns=var_ethnics, index=var_ethnics)
dfpvalue_ethnic
Out[18]:
In [19]:
# weight based on distance
shp_path2 = "/Users/sandrofsousa/Dropbox/Resolution - SP London/Data/Shape files/" \
"London/OA Boundary/resolution_oa_2011_ks201-501ew_cent.shp"
points = get_points_array_from_shapefile(shp_path2)
# 700m bandwdth with gaussian kernel function
weight_dist = pysal.Kernel(points, bandwidth=700.0, function='gaussian')
Histogram showing the distribuition of neighbors in 700m kernel weight
In [32]:
% matplotlib inline
sns.set(context="notebook", font_scale=1.8)
plt.figure(facecolor="white", figsize=(9, 6), dpi=300)
plt.xlabel('Neighbors')
plt.ylabel('Frequency')
plt.title('Kernel weights gaussian bw 700m - neighbors rel.')
plt.bar(*zip(*weight_dist.histogram))
Out[32]:
In [33]:
# Call main function to compute Moran and P-values ethnic groups for 700m
var_ethnics = list(cor_mat_prop)
matrices_ethnic_dist = calc_moran_matrix(var_ethnics, weight_dist, f1)
In [34]:
dfmoran_ethnic_dist = pd.DataFrame(matrices_ethnic_dist[0], columns=var_ethnics, index=var_ethnics)
dfmoran_ethnic_dist
Out[34]:
In [35]:
# Plot Heatmap based on dfmoran_ethnic_dist data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f3, ax3 = plt.subplots(figsize=(12, 9))
plt.title('Moran I correlation matrix - Ethnic groups 700m kernel gaussian weight')
sns.heatmap(dfmoran_ethnic_dist, square=True)
Out[35]:
Histogram showing the distribuition of neighbors in 2000m kernel weight
In [36]:
# 2000m bandwdth with gaussian kernel function
weight_dist2k = pysal.Kernel(points, bandwidth=2000.0, function='gaussian')
In [46]:
% matplotlib inline
sns.set(context="notebook", font_scale=1.8)
plt.figure(facecolor="white", figsize=(9, 6), dpi=300)
plt.xlabel('Neighbors')
plt.ylabel('Frequency')
plt.title('Kernel weights gaussian bw 2000m - neighbors rel.')
plt.bar(*zip(*weight_dist2k.histogram))
Out[46]:
In [38]:
# Call main function to compute Moran and P-values ethnic groups for 2000m
matrices_ethnic_dist2k = calc_moran_matrix(var_ethnics, weight_dist2k, f1)
In [39]:
dfmoran_ethnic_dist2k = pd.DataFrame(matrices_ethnic_dist2k[0], columns=var_ethnics, index=var_ethnics)
dfmoran_ethnic_dist2k
Out[39]:
In [40]:
# Plot Heatmap based on dfmoran_ethnic_dist2k data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f3, ax3 = plt.subplots(figsize=(12, 9))
plt.title('Moran I correlation matrix - Ethnic groups 2000m kernel gaussian weight')
sns.heatmap(dfmoran_ethnic_dist2k, square=True)
Out[40]:
In [48]:
# Build Census Qualification dataframe reading csv file
dataq = "~/Dropbox/Resolution - SP London/Data/Census/London/Census data for Segregation/"\
"ks501ew_2011_oa_Qualifications/KS501EWDATA.csv"
dfcq = pd.read_csv(dataq)
dfcq.head()
In [110]:
# slice qualifications columns and sum each column
qualif = dfcq.loc[:,'No_quals':'FTs_18-74I']
qualif.sum()
Out[110]:
In [111]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(12, 6), dpi=300)
plt.xlabel('Qualification groups')
plt.ylabel('Frequency sum')
plt.title('Qualification intervals')
plt.ticklabel_format(style='plain', axis='y')
qualif.sum().plot(kind='bar')
Out[111]:
In [112]:
# Slice data frame to get proportion data columns
quali_prop = dfcq.loc[:,'No.quals':'FTs.18-74I']
cor_quali_prop = quali_prop.corr()
cor_quali_prop
Out[112]:
In [113]:
# Plot Heatmap based on cor_mat_prop data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f4, ax4 = plt.subplots(figsize=(12, 9))
plt.title('Pearson correlation matrix - Qualification groups percentage')
sns.heatmap(cor_quali_prop, square=True)
Out[113]:
In [47]:
# Call main function to compute Moran and P-values for qualification groups
var_quali = list(cor_quali_prop)
matrices_quali = calc_moran_matrix(var_quali, weight_cont, f1)
DataFrame with pairwise results
In [ ]:
dfmoran_quali = pd.DataFrame(matrices_quali[0], columns=var_quali, index=var_quali)
dfmoran_quali
In [ ]:
# Plot Heatmap based on dfmoran_quali data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f5, ax5 = plt.subplots(figsize=(12, 9))
plt.title('Moran I correlation matrix - Qualification groups Queen weight')
sns.heatmap(dfmoran_quali, square=True)
P-values obtained from previous map
In [ ]:
dfpvalue_quali = pd.DataFrame(matrices_quali[1], columns=var_quali, index=var_quali)
dfpvalue_quali
In [ ]:
# Call main function to compute Moran and P-values qualification groups
matrices_quali_dist = calc_moran_matrix(var_quali, weight_dist, f1)
In [ ]:
dfmoran_quali_dist = pd.DataFrame(matrices_quali_dist[0], columns=var_quali, index=var_quali)
dfmoran_quali_dist
In [ ]:
# Plot Heatmap based on dfmoran data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f3, ax3 = plt.subplots(figsize=(12, 9))
plt.title('Moran I correlation matrix - Qualification groups 700m kernel gaussian weight')
sns.heatmap(dfmoran_quali_dist, square=True)
In [ ]:
# Call main function to compute Moran and P-values qualification groups
matrices_quali_dist2k = calc_moran_matrix(var_quali, weight_dist2k, f1)
In [ ]:
dfmoran_quali_dist2k = pd.DataFrame(matrices_quali_dist2k[0], columns=var_quali, index=var_quali)
dfmoran_quali_dist2k
In [ ]:
# Plot Heatmap based on data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f3, ax3 = plt.subplots(figsize=(12, 9))
plt.title('Moran I correlation matrix - Qualification groups 2000m kernel gaussian weight')
sns.heatmap(dfmoran_quali_dist2k, square=True)
Count in function of frequency sum (QS607EW - old table)
In [ ]:
# Build Census Ocupation dataframe reading csv file
datao = "~/Dropbox/Resolution - SP London/Data/Census/London/old/qs607ew_2011_oa/"\
"RES Met London data/RES Met London QS607EW_oa_occupation_1.csv"
dfco = pd.read_csv(datao)
dfco.head()
In [ ]:
# slice ocupations columns and sum each column
ocupation = dfco.loc[:,'QS607EW0002':'QS607EW0055']
ocupation.sum()
In [ ]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(18, 6), dpi=300)
plt.xlabel('Ocupation groups')
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals')
plt.ticklabel_format(style='plain', axis='y')
ocupation.sum().plot(kind='bar')
In [ ]:
# slice ocupations columns and sum each column
higher = dfco[['QS607EW0002',
'QS607EW0011',
'QS607EW0019',
'QS607EW0024',
'QS607EW0031',
'QS607EW0036',
'QS607EW0044',
'QS607EW0050']]
higher.sum()
In [ ]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(10, 6), dpi=300)
plt.xlabel('Ocupation higher groups')
plt.ylabel('Frequency sum')
plt.title('Ocupation higher intervals')
plt.ticklabel_format(style='plain', axis='y')
higher.sum().plot(kind='bar')
In [3]:
# Build Census ocupation dataframe reading csv file
dataoccup = "~/Dropbox/Resolution - SP London/Data/Census/Original Data/London/Census data for Segregation/" \
"qs606ew_2011_occupation/qs606ew_oa_occupation_minor.csv"
dfoccup = pd.read_csv(dataoccup)
dfoccup.head()
Out[3]:
In [5]:
# slice ocupations columns and sum each column
occup_full = dfoccup.loc[:,'Managers':'S606EW0125']
# occup_full.sum()
Full occupation groups
In [4]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(18, 6), dpi=300)
plt.xlabel('Ocupation groups')
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals - QS606EW FULL')
plt.ticklabel_format(style='plain', axis='y')
occup_full.sum().plot(kind='bar')
Out[4]:
First level hierarchy
In [9]:
# slice ocupations columns for first hierarchy level
hier_level1 = occup_full[['Managers',
'Prof_occup',
'Thecnical',
'Administ',
'Skl_trades',
'Care_other',
'Sales',
'Operative',
'Elementary']]
hier_level1.sum()
Out[9]:
In [10]:
labelsl1 = [
"Manag-direc-officials - 1",
"Professional occupations - 2",
"Assoc professional-tech occup - 3",
"Adm-secretarial occup - 4",
"Skilled trades occup - 5",
"Caring-leisure-other serv - 6",
"Sales-customer serv - 7",
"Process-plant-machine oper - 8",
"Elementary occup - 9"]
In [7]:
# plot income
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(10, 4), dpi=300)
plt.xlabel('Ocupation groups')
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals - First level hierarchy')
plt.ticklabel_format(style='plain', axis='y')
figl1 = hier_level1.sum().plot(kind='bar')
lab1 = figl1.set_xticklabels(labelsl1)
Second level hierarchy
In [8]:
# slice ocupations columns for second hierarchy level
hier_level2 = occup_full[['S606EW0003', 'S606EW0012', 'S606EW0018', 'S606EW0024', 'S606EW0028',
'S606EW0030', 'S606EW0039', 'S606EW0043', 'S606EW0046', 'S606EW0048',
'S606EW0052', 'S606EW0060', 'S606EW0066', 'S606EW0069', 'S606EW0071',
'S606EW0077', 'S606EW0081', 'S606EW0087', 'S606EW0091', 'S606EW0097',
'S606EW0101', 'S606EW0105', 'S606EW0110', 'S606EW0115', 'S606EW0119']]
# hier_level2.sum()
In [11]:
labelsl2 = [
"Corp manag-directors - 1.2",
"Other manag-proprietors - 1.3",
"Science-research-eng-tech prof - 2.1",
"Health professionals - 2.2",
"Teaching-educational prof - 2.3",
"Business-media-public serv - 2.4",
"Science-eng-tech associate - 3.1",
"Health-social care associate - 3.2",
"Protective service occup - 3.3",
"Culture-media-sports occup - 3.4",
"Business-public serv assoc - 3.5",
"Administrative occupations - 4.1",
"Secretarial-related occup - 4.2",
# "Skilled agricult-related trades - 5.1",
"Skilled metal-electric trades - 5.2",
"Skilled const-building trades - 5.3",
"Textiles-printing-other skilled - 5.4",
"Caring personal serv occup - 6.1",
"Leisure-travel-related serv - 6.2",
"Sales occupations - 7.2",
"Customer serv occup - 7.3",
"Process-plant-machine oper - 8.1",
"Transport-machine driver-oper - 8.2",
"Element trades-related occup - 9.1",
"Element adm-service occup - 9.2"]
In [1]:
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(12, 6), dpi=300)
plt.xlabel('Ocupation groups')
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals - Second level hierarchy')
plt.ticklabel_format(style='plain', axis='y')
fig = hier_level2.sum().plot(kind='bar')
plt.tight_layout()
lab2 = fig.set_xticklabels(labelsl2)
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Plots/Occupation_Hist_hierl2.png")
Third level hierarchy
In [12]:
# slice ocupations columns for third hierarchy level
# removed: S606EW0013,S606EW0022,S606EW0057,S606EW0070,S606EW0116
hier_level3 = occup_full[[
'S606EW0004','S606EW0005','S606EW0006','S606EW0007','S606EW0008','S606EW0009','S606EW0010','S606EW0011',
'S606EW0014','S606EW0015','S606EW0016','S606EW0019','S606EW0020','S606EW0021',
'S606EW0023','S606EW0025','S606EW0026','S606EW0027','S606EW0029','S606EW0031','S606EW0032','S606EW0033',
'S606EW0034','S606EW0035','S606EW0036','S606EW0037','S606EW0040','S606EW0041','S606EW0042','S606EW0044',
'S606EW0045','S606EW0047','S606EW0049','S606EW0050','S606EW0051','S606EW0053','S606EW0054','S606EW0055',
'S606EW0056','S606EW0058','S606EW0061','S606EW0062','S606EW0063','S606EW0064','S606EW0065',
'S606EW0067','S606EW0072','S606EW0073','S606EW0074','S606EW0075','S606EW0076','S606EW0078',
'S606EW0079','S606EW0080','S606EW0082','S606EW0083','S606EW0084','S606EW0085','S606EW0088','S606EW0089',
'S606EW0090','S606EW0092','S606EW0093','S606EW0094','S606EW0095','S606EW0098','S606EW0099','S606EW0100',
'S606EW0102','S606EW0103','S606EW0106','S606EW0107','S606EW0108','S606EW0109','S606EW0111','S606EW0112',
'S606EW0113','S606EW0117','S606EW0118','S606EW0120','S606EW0121','S606EW0122','S606EW0123',
'S606EW0124','S606EW0125']]
# hier_level3 = occup_full[[
# 'S606EW0004','S606EW0005','S606EW0006','S606EW0007','S606EW0008','S606EW0009','S606EW0010','S606EW0011',
# 'S606EW0013','S606EW0014','S606EW0015','S606EW0016','S606EW0019','S606EW0020','S606EW0021','S606EW0022',
# 'S606EW0023','S606EW0025','S606EW0026','S606EW0027','S606EW0029','S606EW0031','S606EW0032','S606EW0033',
# 'S606EW0034','S606EW0035','S606EW0036','S606EW0037','S606EW0040','S606EW0041','S606EW0042','S606EW0044',
# 'S606EW0045','S606EW0047','S606EW0049','S606EW0050','S606EW0051','S606EW0053','S606EW0054','S606EW0055',
# 'S606EW0056','S606EW0057','S606EW0058','S606EW0061','S606EW0062','S606EW0063','S606EW0064','S606EW0065',
# 'S606EW0067','S606EW0070','S606EW0072','S606EW0073','S606EW0074','S606EW0075','S606EW0076','S606EW0078',
# 'S606EW0079','S606EW0080','S606EW0082','S606EW0083','S606EW0084','S606EW0085','S606EW0088','S606EW0089',
# 'S606EW0090','S606EW0092','S606EW0093','S606EW0094','S606EW0095','S606EW0098','S606EW0099','S606EW0100',
# 'S606EW0102','S606EW0103','S606EW0106','S606EW0107','S606EW0108','S606EW0109','S606EW0111','S606EW0112',
# 'S606EW0113','S606EW0116','S606EW0117','S606EW0118','S606EW0120','S606EW0121','S606EW0122','S606EW0123',
# 'S606EW0124','S606EW0125']]
# hier_level3.sum()
In [13]:
labelsl3 = [
"Chief Exec-Sen Offic - 1.2.1",
"Produc Manag-Directors - 1.2.2",
"Func Manag-Directors - 1.2.3",
"Finan Inst Manag-Directors - 1.2.4",
"Manag-Directors Transp-Log - 1.2.5",
"Sen Offic Protect Serv - 1.2.6",
"Health-Social Serv Manag Dir - 1.2.7",
"Manag-Dir Retail Wholesale - 1.2.8",
# "Manag-Propriet Agriculture - 1.3.1",
"Manag-Propriet Hospitality - 1.3.2",
"Manag-Propriet Health Serv - 1.3.3",
"Manag-Proprietors Other Serv - 1.3.4",
"Natural-Social Science Prof - 2.1.1",
"Engineering Prof - 2.1.2",
"Information Tech-Telecom Prof - 2.1.3",
# "Conservation-Environ Prof - 2.1.4",
"Research-Development Manag - 2.1.5",
"Health Prof - 2.2.1",
"Therapy Prof - 2.2.2",
"Nursing and Midwifery Prof - 2.2.3",
"Teaching-Educational Prof - 2.3.1",
"Legal Prof - 2.4.1",
"Business-Research-Adm Prof - 2.4.2",
"Architec-Town Plan-Surveyors - 2.4.3",
"Welfare Prof - 2.4.4",
"Librarians and Prof - 2.4.5",
"Quality and Regulatory Prof - 2.4.6",
"Media Prof - 2.4.7",
"Science-Eng-Production Tech - 3.1.1",
"Draughtsperson-Architec Tech - 3.1.2",
"Information Technology Tech - 3.1.3",
"Health Assoc Prof - 3.2.1",
"Welfare-Housing Assoc Prof - 3.2.2",
"Protective Serv Occup - 3.3.1",
"Artistic-Literary-Media Occup - 3.4.1",
"Design Occup - 3.4.2",
"Sports and Fitness Occup - 3.4.3",
"Transport Assoc Prof - 3.5.1",
"Legal Assoc Prof - 3.5.2",
"Business-Finance-Assoc Prof - 3.5.3",
"Sales-Marketing-Assoc Prof - 3.5.4",
# "Conservat-Environ Assoc Prof - 3.5.5",
"Public Serv-Other Assoc Prof - 3.5.6",
"Adm Occup: Govern-Organis - 4.1.1",
"Adm Occup: Finance - 4.1.2",
"Adm Occup: Records - 4.1.3",
"Other Adm Occup - 4.1.4",
"Adm Occup: Offic Manag Sup - 4.1.5",
"Secretarial and Occup - 4.2.1",
# "Agricultural and Trades - 5.1.1",
"Metal Form-Welding Trades - 5.2.1",
"Metal Machin-Fit-Instrument - 5.2.2",
"Vehicle Trades - 5.2.3",
"Electrical-Electronic Trades - 5.2.4",
"Skilled Metal-Elec Trades Sup - 5.2.5",
"Construction-Building Trades - 5.3.1",
"Building Finishing Trades - 5.3.2",
"Constru-Building Trades Sup - 5.3.3",
"Textiles-Garments Trades - 5.4.1",
"Printing Trades - 5.4.2",
"Food Prep-Hospitality Trades - 5.4.3",
"Other Skilled Trades - 5.4.4",
"Childcare-Personal Serv - 6.1.1",
"Animal Care-Control Serv - 6.1.2",
"Caring Personal Serv - 6.1.3",
"Leisure and Travel Serv - 6.2.1",
"Hairdressers and Serv - 6.2.2",
"Housekeeping and Serv - 6.2.3",
"Clean-Housekeep Manag Sup - 6.2.4",
"Sales Assist-Retail Cashiers - 7.2.1",
"Sales Occup - 7.2.2",
"Sales Supervisors - 7.2.3",
"Customer Serv Occup - 7.3.1",
"Customer Manag-Superv - 7.3.2",
"Process Oper - 8.1.1",
"Plant and Machine Oper - 8.1.2",
"Assemblers-Routine Oper - 8.1.3",
"Construction Oper - 8.1.4",
"Road Transport Drivers - 8.2.1",
"Mob Machin Drivers-Oper - 8.2.2",
"Other Driver-Transport Oper - 8.2.3",
# "Element Agricultural Occup - 9.1.1",
"Element Construct Occup - 9.1.2",
"Element Process Plant Occup - 9.1.3",
"Element Administ Occup - 9.2.1",
"Element Cleaning Occup - 9.2.2",
"Element Security Occup - 9.2.3",
"Element Sales Occup - 9.2.4",
"Element Storage Occup - 9.2.5",
"Other Element Serv Occup - 9.2.6"]
In [69]:
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(20, 8), dpi=300)
plt.xlabel('Ocupation groups')
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals - Third level hierarchy - reprocessed', fontsize=16)
plt.ticklabel_format(style='plain', axis='y')
fig = hier_level3.sum().plot(kind='bar')
lab3 = fig.set_xticklabels(labelsl3)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Plots/Occupation_Hist_hierl3.png")
In [6]:
# Group third level hierarchies to build second and first level considering the deleted classes
dfoccup['s12'] = dfoccup[['S606EW0004','S606EW0005','S606EW0006','S606EW0007','S606EW0008','S606EW0009','S606EW0010','S606EW0011']].sum(axis=1)
dfoccup['s13'] = dfoccup[['S606EW0014','S606EW0015','S606EW0016']].sum(axis=1)
dfoccup['s21'] = dfoccup[['S606EW0019','S606EW0020','S606EW0021','S606EW0023']].sum(axis=1)
dfoccup['s22'] = dfoccup[['S606EW0025','S606EW0026','S606EW0027']].sum(axis=1)
dfoccup['s23'] = dfoccup[['S606EW0029']].sum(axis=1)
dfoccup['s24'] = dfoccup[['S606EW0031','S606EW0032','S606EW0033','S606EW0034','S606EW0035','S606EW0036','S606EW0037']].sum(axis=1)
dfoccup['s31'] = dfoccup[['S606EW0040','S606EW0041','S606EW0042']].sum(axis=1)
dfoccup['s32'] = dfoccup[['S606EW0044','S606EW0045']].sum(axis=1)
dfoccup['s33'] = dfoccup[['S606EW0047']].sum(axis=1)
dfoccup['s34'] = dfoccup[['S606EW0049','S606EW0050','S606EW0051']].sum(axis=1)
dfoccup['s35'] = dfoccup[['S606EW0053','S606EW0054','S606EW0055','S606EW0056','S606EW0058']].sum(axis=1)
dfoccup['s41'] = dfoccup[['S606EW0061','S606EW0062','S606EW0063','S606EW0064','S606EW0065']].sum(axis=1)
dfoccup['s42'] = dfoccup[['S606EW0067']].sum(axis=1)
dfoccup['s52'] = dfoccup[['S606EW0072','S606EW0073','S606EW0074','S606EW0075','S606EW0076']].sum(axis=1)
dfoccup['s53'] = dfoccup[['S606EW0078','S606EW0079','S606EW0080']].sum(axis=1)
dfoccup['s54'] = dfoccup[['S606EW0082','S606EW0083','S606EW0084','S606EW0085']].sum(axis=1)
dfoccup['s61'] = dfoccup[['S606EW0088','S606EW0089','S606EW0090']].sum(axis=1)
dfoccup['s62'] = dfoccup[['S606EW0092','S606EW0093','S606EW0094','S606EW0095']].sum(axis=1)
dfoccup['s72'] = dfoccup[['S606EW0098','S606EW0099','S606EW0100']].sum(axis=1)
dfoccup['s73'] = dfoccup[['S606EW0102','S606EW0103']].sum(axis=1)
dfoccup['s81'] = dfoccup[['S606EW0106','S606EW0107','S606EW0108','S606EW0109']].sum(axis=1)
dfoccup['s82'] = dfoccup[['S606EW0111','S606EW0112','S606EW0113']].sum(axis=1)
dfoccup['s91'] = dfoccup[['S606EW0117','S606EW0118']].sum(axis=1)
dfoccup['s92'] = dfoccup[['S606EW0120','S606EW0121','S606EW0122','S606EW0123','S606EW0124','S606EW0125']].sum(axis=1)
# First level
dfoccup['s1'] = dfoccup[['s12','s13']].sum(axis=1)
dfoccup['s2'] = dfoccup[['s21','s22','s23','s24']].sum(axis=1)
dfoccup['s3'] = dfoccup[['s31','s32','s33','s34','s35']].sum(axis=1)
dfoccup['s4'] = dfoccup[['s41','s42']].sum(axis=1)
dfoccup['s5'] = dfoccup[['s52','s53','s54']].sum(axis=1)
dfoccup['s6'] = dfoccup[['s61','s62']].sum(axis=1)
dfoccup['s7'] = dfoccup[['s72','s73']].sum(axis=1)
dfoccup['s8'] = dfoccup[['s81','s82']].sum(axis=1)
dfoccup['s9'] = dfoccup[['s91','s92']].sum(axis=1)
In [8]:
# Save data with new columns to csv
csv_temp = "~/Downloads/occupation_grouped_qs606oa.csv"
dfoccup.loc[:,'Geocode':'s9'].to_csv(csv_temp) #save to csv
In [52]:
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(12, 7), dpi=300)
plt.xlabel('Ocupation groups')
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals - Second level hierarchy - reprocessed', fontsize=16)
plt.ticklabel_format(style='plain', axis='y')
figr = dfoccup.loc[:,'s12':'s92'].sum().plot(kind='bar')
lab3r = figr.set_xticklabels(labelsl2)
plt.tight_layout()
In [62]:
% matplotlib inline
sns.set_style("darkgrid")
sns.set_context("talk", font_scale=0.7, rc={"lines.linewidth": 1.5})
plt.figure(facecolor="white", figsize=(10, 6), dpi=300)
plt.xlabel('Ocupation groups')
plt.ylabel('Frequency sum')
plt.title('Ocupation intervals - First level hierarchy - reprocessed')
plt.ticklabel_format(style='plain', axis='y')
figr = dfoccup.loc[:,'s1':'s9'].sum().plot(kind='bar')
lab3r = figr.set_xticklabels(labelsl1)
plt.tight_layout()
In [14]:
# Slice data frame to get proportion data columns
occup_prop = dfoccup.loc[:,'Managers':'S606EW0125']
cor_occup_prop = occup_prop.corr()
cor_occup_prop.head()
Out[14]:
In [15]:
# Plot Heatmap based on cor_ocup_prop data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f4, ax4 = plt.subplots(figsize=(26, 21), dpi=300)
plt.title('Pearson correlation matrix - Occupation groups counts - full', fontsize=25)
sns.heatmap(cor_occup_prop, square=True)
f4.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_full.png")
First level Hierarchy
In [16]:
cor_occup_hier1 = hier_level1.corr()
cor_occup_hier1.head()
Out[16]:
In [17]:
# Plot Heatmap based on hier_level3 data frame
% matplotlib inline
sns.set(context="notebook")
f6, ax6 = plt.subplots(figsize=(9, 6), dpi=300)
plt.title('Pearson correlation matrix - Occupation groups first level hierarchy')
sns.heatmap(cor_occup_hier1, square=True)
f6.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_.png")
In [74]:
# Plot Heatmap based on hier_level3 data frame
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12, 9), dpi=300)
plt.title('Pearson correlation matrix - Occupation first level hierarchy - reprocessed')
sns.heatmap(dfoccup.loc[:,'s1':'s9'].corr(), square=True, xticklabels=labelsl1, yticklabels=labelsl1)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_.png")
Second level Hierarchy
In [18]:
cor_occup_hier2 = hier_level2.corr()
cor_occup_hier2.head()
Out[18]:
In [19]:
# Plot Heatmap based on hier_level3 data frame
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12, 9), dpi=300)
plt.title('Pearson correlation matrix - Occupation groups second level hierarchy')
f7 = sns.heatmap(cor_occup_hier2, square=True)
f7.set_xticklabels(labelsl2)
f7.set_yticklabels(reversed(labelsl2))
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_hierl2.png")
In [72]:
# Plot Heatmap based on hier_level3 data frame
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12, 9), dpi=300)
plt.title('Pearson correlation matrix - Occupation second level hierarchy - reprocessed')
sns.heatmap(dfoccup.loc[:,'s12':'s92'].corr(), square=True, xticklabels=labelsl2, yticklabels=labelsl2)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_.png")
Third level Hierarchy
In [24]:
cor_occup_hier3 = hier_level3.corr()
cor_occup_hier3.head()
Out[24]:
In [75]:
# Plot Heatmap based on hier_level3 data frame
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(26, 21), dpi=300)
plt.title('Pearson correlation matrix - Occupation third level hierarchy - reprocessed', fontsize=20)
f8 = sns.heatmap(cor_occup_hier3, square=True, xticklabels=labelsl3, yticklabels=labelsl3)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_pearson_hierl3.png")
In [23]:
# Save data with new columns to csv
csvoccup = "~/Dropbox/Resolution - SP London/Data/Census/London/Census data for Segregation/"\
"qs606ew_2011_occupation/qs606ew_reprocessed.csv"
dfoccup.loc[:,['Geocode','s12','s13','s21','s22','s23','s24','s31','s32','s33','s34','s35','s41','s42','s52','s53','s54',
's61','s62','s72','s73','s81','s82','s91','s92','s1','s2','s3','s4','s5','s6','s7','s8','s9']].to_csv(csvoccup, index=False)
In [25]:
shp_path3 = "/Users/sandrofsousa/Downloads/reprocessing/resolution_oa_2011_qs606ew.shp"
weight_cont_occup = pysal.queen_from_shapefile(shp_path3)
Histogram showing the distribuition of neighbors in queen weight
In [27]:
% matplotlib inline
sns.set(context="notebook")
plt.figure(facecolor="white", figsize=(6, 3), dpi=300)
plt.xlabel('Neighbors')
plt.ylabel('Frequency')
plt.title('Queen weights histogram - neighbors relations for occupation')
plt.bar(*zip(*weight_cont_occup.histogram))
Out[27]:
Compute Bivariate Moran for all elements of occupation groups
Variables are compared pairwised resulting on a matrix 999 random permutations are used for calculation of pseudo p_values
In [27]:
# Call main function to compute Moran and P-values ethnic groups
f2 = pysal.open("/Users/sandrofsousa/Downloads/reprocessing/resolution_oa_2011_qs606ew_cent.dbf")
In [ ]:
var_occup = list(cor_occup_prop)
matrices_occup = calc_moran_matrix(var_occup, weight_cont_occup, f2)
DataFrame with pairwise results
In [ ]:
dfmoran_occup = pd.DataFrame(matrices_occup[0], columns=var_occup, index=var_occup)
dfmoran_occup.head()
In [ ]:
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
f5, ax5 = plt.subplots(figsize=(26, 21), dpi=300)
plt.title('Moran I correlation matrix - Ocupation groups Queen weight', fontsize=25)
sns.heatmap(dfmoran_occup, square=True)
f5.tight_layout()
plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_full.png")
Moran Bivariate Queen weight for first hierarchy level
In [81]:
var_occup_hier1 = list(hier_level1)
matrices_occupl1 = calc_moran_matrix(var_occup_hier1, weight_cont_occup, f2)
In [83]:
dfmoran_occup_hier1 = pd.DataFrame(matrices_occupl1[0], columns=var_occup_hier1, index=var_occup_hier1)
dfmoran_occup_hier1.head()
Out[83]:
In [94]:
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12,9), dpi=300)
plt.title('Moran I correlation matrix - Ocupation groups Queen weight Hierarchy level 1')
h1 = sns.heatmap(dfmoran_occup_hier1, square=True)
h1.set_xticklabels(labelsl1, rotation='vertical')
h1.set_yticklabels(reversed(labelsl1), rotation='horizontal')
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_hierl1.png")
In [28]:
var_occup_repl1 = list(dfoccup.loc[:,'s1':'s9'])
matrices_occuprep1 = calc_moran_matrix(var_occup_repl1, weight_cont_occup, f2)
In [30]:
dfmoran_occup_rep1 = pd.DataFrame(matrices_occuprep1[0], columns=var_occup_repl1, index=var_occup_repl1)
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12,9), dpi=300)
plt.title('Moran I correlation matrix - Ocupation Queen weight Hierarchy level 1 - reprocessed')
h1 = sns.heatmap(dfmoran_occup_rep1, square=True, xticklabels=labelsl1, yticklabels=labelsl1)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_hierl1.png")
Moran Bivariate Queen weight for second hierarchy level
In [95]:
var_occup_hier2 = list(hier_level2)
matrices_occupl2 = calc_moran_matrix(var_occup_hier2, weight_cont_occup, f2)
In [96]:
dfmoran_occup_hier2 = pd.DataFrame(matrices_occupl2[0], columns=var_occup_hier2, index=var_occup_hier2)
dfmoran_occup_hier2.head()
Out[96]:
In [98]:
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(16,12), dpi=300)
plt.title('Moran I correlation matrix - Ocupation groups Queen weight Hierarchy level 2')
h2 = sns.heatmap(dfmoran_occup_hier2, square=True)
h2.set_xticklabels(labelsl2)
h2.set_yticklabels(reversed(labelsl2))
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_hierl2.png")
In [31]:
var_occup_repl2 = list(dfoccup.loc[:,'s12':'s92'])
matrices_occuprep2 = calc_moran_matrix(var_occup_repl2, weight_cont_occup, f2)
In [34]:
dfmoran_occup_rep2 = pd.DataFrame(matrices_occuprep2[0], columns=var_occup_repl2, index=var_occup_repl2)
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(12,9), dpi=300)
plt.title('Moran I correlation matrix - Ocupation Queen weight Hierarchy level 2 - reprocessed')
h1r = sns.heatmap(dfmoran_occup_rep2, square=True, xticklabels=labelsl2, yticklabels=labelsl2)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_hierl1.png")
Moran Bivariate Queen weight for third hierarchy level
In [35]:
var_occup_hier3 = list(hier_level3)
matrices_occupl3 = calc_moran_matrix(var_occup_hier3, weight_cont_occup, f2)
In [36]:
dfmoran_occup_hier3 = pd.DataFrame(matrices_occupl3[0], columns=var_occup_hier3, index=var_occup_hier3)
dfmoran_occup_hier3.head()
Out[36]:
In [37]:
# Plot Heatmap based on dfmoran_occup data frame - Proportions
% matplotlib inline
sns.set(context="notebook")
plt.subplots(figsize=(26, 21), dpi=300)
plt.title('Moran I correlation matrix - Ocupation Queen weight Hierarchy level 3 - reprocessed', fontsize=25)
h3 = sns.heatmap(dfmoran_occup_hier3, square=True, xticklabels=labelsl3, yticklabels=labelsl3)
plt.tight_layout()
# plt.savefig("/Users/sandrofsousa/Dropbox/Resolution - SP London/Documents/Histograms/Occupation_moran_hierl3.png")